****************************	
**# A. Merge other datasets
****************************
* SIC87 dataset
use "processing/SIC87_Final_Dataset.dta", clear	
keep if year >= 1970 & year <= 1988

* Routineness
mmerge sic using "processing/sic_routineness", type(n:1)
	* m=2: 60 sic inds in our rti data that not in sic dataset
	*      459 sic inds in rti data. 399 inds in our sic dataset.
assert _m!=1
drop if _m == 2
drop _m

* MFA shares
mmerge sic using "processing/mfa_sic_shares", type(n:1)
	* m=2: 65 sic inds in our mfa data that not in sic dataset, but only 5 sic manuf inds
	*      464 sic inds in mfa data. 399 inds in our sic dataset.
assert _m!=1
drop if _m == 2
drop _m

* Foreign tariff changes
mmerge sic using "processing/other_tariff_changes", type(n:1)		
	* m=1: 4 unique sic inds in sic data for which we don't have export tariff data
	* m=2: 48 sic inds in export tariff data that not in our sic dataset
	*      440 sic inds in export tariff data. 399 inds in our sic dataset.
unique sic if _m==1
drop if _m == 2
drop _merge

* Women's emp shares
mmerge sic using "processing/sic_women_shares", type(n:1)
assert _m==3
drop _m

* Price growth
merge m:1 sic using "processing/sic_price_growth"
assert _m == 3
drop _m

* Automation
merge m:1 sic using "raw/automation_sic_1947_1978", nogen keep(1 3)
rename automation automation78
sum automation78 if year==1979 , d
loc med_auto78=r(p50)
di `med_auto78'
count if automation78>=`med_auto78' & year==1979 
count if year==1979
gen automation=(automation78>=`med_auto78')
tab automation

	
	
****************************************
**# B. Data for different time horizons
****************************************
xtset sic year

replace ave_iv_swiss = ln(1+ave_iv_swiss)
replace ave_ols = ln(1+tariffs/imports)
replace ave_swiss  = ln(1+ave_swiss)
replace ave_iv_upstream_impwt = ln(1+ave_iv_upstream_impwt)
replace ave_ols_upstream_impwt = ln(1+ave_ols_upstream_impwt)

forvalues window = 1/9 {

gen ave_iv_ch`window' = (f`window'.ave_iv_swiss - ave_iv_swiss)
gen ave_iv_up_ch`window' = (f`window'.ave_iv_upstream_impwt - ave_iv_upstream_impwt)

gen ave_ols_ch`window' = (f`window'.ave_ols - ave_ols)
gen ave_ols_up_ch`window' = (f`window'.ave_ols_upstream_impwt - ave_ols_upstream_impwt)
gen ave_swiss_ch`window' = (f`window'.ave_swiss- ave_swiss)

foreach outcome in ln_imp ln_exp ln_imp_pen ln_exp_ship imp_pen mat_ship ln_mat_ship	ln_skill_pay_diff ln_skill_emp_diff ln_wsp  exp_ship pay_ship ln_pay_ship ln_shipment {	

		gen `outcome'_ch_`window' = f`window'.`outcome' - `outcome' 		
	}
}


*The instrument is the same over 8- and 9-year windows since phase-in is
*complete by 1987
replace ave_iv_ch9 = ave_iv_ch8
replace ave_iv_up_ch9 = ave_iv_up_ch8
replace ave_swiss_ch9 = ave_swiss_ch8 

*Import and Export Data begin in 1972, so using 7-year changes for lags 
foreach outcome in  ln_imp ln_exp ln_imp_pen imp_pen ln_exp_ship exp_ship  pay_ship ln_pay_ship ln_shipment mat_ship ln_mat_ship  ln_skill_pay_diff ln_skill_emp_diff ln_wsp  {
gen lag_`outcome'_ch = l7.`outcome'_ch_7*(9/7)
	* making them 9 year equivalents so our lagged tables are similarly interpretable 
gen `outcome'_ch  =  `outcome'_ch_9 
}
gen lag_ave_ols_ch = ave_ols - l7.ave_ols
gen lag_invest_change = ln_invest_78-ln_invest_70
gen lag_mat_ship_change = ln_mat_ship - ln_mat_ship_70
gen lag_invest_def_ch  = ln_invest_def_78 - ln_invest_def_70
replace sts = 0 if sts==.	


egen exp79 = max(exp(ln_exp)*(year==1979)), by(sic)
gen sts_temp = sts*(year==1972)
drop sts
egen sts = max(sts_temp), by(sic)
drop sts_temp

gen sts_temp = sts*(year==1979)
egen sts79 = max(sts_temp), by(sic)
assert sts79!=.
drop sts_temp
gen ave79 = ave_ols*(year==1979)
egen ave_m79 = max((ave79)/(1+ave79)), by(sic)
gen rho = sts79*ave_m79

gen se = .
gen beta = .
gen ub01 = .
gen lb01 = .
gen ub05 = .
gen lb05 = .
gen ub10 = .
gen lb10 = .


label var ave_ols_ch9 "\$ \Delta \ln\left(1+AVE_{i}\right)  \$"
label var ave_ols_up_ch9 "\$ \Delta  \ln\left(1+AVE_{i}^{Up}\right)  \$"

label var other_tariff_change "\$ \Delta {AVE_{i}}^{Exports}  \$"

label var lag_ave_ols_ch  "\$ \Delta \ln\left(1+\widehat{AVE}_{i}\right$"
label var dln_pstar_vw_7279_alt  "\$\Delta \ln(p^*_{i,t-1})\$"
label var lag_ln_imp_ch  "\$\Delta  \ln(Imports)_{i}\$" 
label var rho 		  "\$ STS_{i}*\frac{AVE_{i}}{1+AVE_{i}}\$" 
	
label var lag_imp_pen_ch   "\$  \Delta \text{Imp. Pen.}_{i}  \$"
label var lag_ln_exp_ch  	 "\$  \Delta\ln \left(Exports_{i} \right) \$"
label var lag_exp_ship_ch   "\$  \Delta \frac{Exports_{i}}{Shipments_{i}}    \$"


label var lag_invest_change "\$ \Delta \ln(Investment_{i,t-1})\$"
label var ln_emp_78 "\$ \ln(Emp_{i})  \$"
label var skill_emp_share_78 "\$ \frac{Emp_{i}^{Non-Prod}}{Emp_{i}} \$"
label var ln_cap_lab_78 "\$ \ln(\frac{Capital_{i}}{Labor_{i}}) \$"
label var rti "\$Routineness_{i}\$"
label var ln_invest_78 "\$ \ln(Investment_{i})  \$"
label var mfa_share "MFA \$\text{Share}_{i}\$"
label var women_share "\$ \frac{Emp_i^{Women}}{Emp_{i}} \$"

label var ln_cap_lab_78 "\$ \frac{Capital_{i}}{Labor_{i}} \$"
gen  mfa = mfa_share>0
label var mfa "\$ I(MFA_i) \$ "

label var lag_invest_def_ch "\$ \Delta \ln(Investment_{i,t-1})    \$"
label var ln_invest_def_78 "\$ \ln(Investment_{i})  \$"
label var  mat_ship_def_78 "\$\frac{Materials_i}{Shipments_i}\$"
label var automation "Indicates above median automation78"
label var automation78 "\$ Automation_i\$"
 
 
**# C. Clean up 
drop ln_invest_def_1978 

loc tariff_controls dln_pstar_vw_7279_alt mfa rho other_tariff_change
loc prod_controls 	ln_cap_lab_78 skill_emp_share_78 women_share mat_ship_def_78
loc rti_controls 	ln_invest_def_78 lag_invest_def_ch rti automation78 automation
loc controls_1930 	hp_per_worker_1930 skilled_share_1930 female_share_1930 mat_ship_1930 invest_1930
loc tariffVars 		ave_ols ave_ols_ch* ave_iv_ch* ave_ols_up_ch9 ave_iv_up_ch9 ave_iv_swiss ave_swiss tariffs_c2 
loc tradeVars 		ln_imp imp_pen ln_exp exp_ship *ln_imp_ch *imp_pen_ch *ln_exp_ch *exp_ship_ch imports_c2 imports
loc tradeVars		`tradeVars' ln_imp_ch* imp_pen_ch* ln_exp_ch* exp_ship_ch*
loc empVars 		ln_skill_pay_diff*  ln_skill_emp_diff*  ln_wsp* pay skill_pay lag_ln_skill_pay_diff_ch
loc otherVars		lb* ub* beta se
keep sic year `tariff_controls' `prod_controls' `rti_controls' `tariffVars' `tradeVars' `empVars' `otherVars'
drop lag_ln_exp_ship* ln_exp_ship* lag_ln_imp_pen* ln_imp_pen*

label var sic "4-digit 1987 SIC industry"
label var imports "Imports from GLL import & tariff database"
label var tariffs_c2_swiss "IV implied calculated duties on importsGLL"

forvalues n=1/9 {
loc y1=1979+`n'
label var ln_imp_ch_`n'  	"Delta log imports 1979-`y1'"
label var imp_pen_ch_`n' 	"Delta import penetration 1979-`y1'"
label var ln_exp_ch_`n' 	"Delta log exports 1979-`y1'"
label var exp_ship_ch_`n'	"Delta exports/shipments 1979-`y1'"
label var ave_iv_ch`n'		"Delta AVE IV 1979-`y1'"
label var ave_ols_ch`n'		"Delta AVE 1979-`y1'"
label var ave_ols_ch`n'		"Delta AVE 1979-`y1'"
label var ave_ols_ch`n'		"Delta AVE 1979-`y1'"
label var ln_skill_pay_diff_ch_`n'	"Delta ln_skill_pay_diff 1979-`y1'"
label var ln_skill_emp_diff_ch_`n'	"Delta ln_skill_emp_diff 1979-`y1'"
label var ln_wsp_ch_`n'				"Delta ln_wsp 1979-`y1'"
}

label var ave_iv_up_ch9 		"Delta AVE IV Upstream 1979-1988"
label var ln_skill_pay_diff_ch	"Delta ln_skill_pay_diff 1979-1988"
label var ln_skill_emp_diff_ch	"Delta ln_skill_emp_diff 1979-1988"
label var ln_wsp_ch				"Delta ln_wsp 1979-1988"
label var ln_imp_ch  	"Delta log imports 1979-1988"
label var imp_pen_ch 	"Delta import penetration 1979-1988"
label var ln_exp_ch 	"Delta log exports 1979-1988"
label var exp_ship_ch	"Delta exports/shipments 1979-1988"
label var ln_imp  		"Log imports 1979-1988"
label var imp_pen 		"Import penetration 1979-1988"
label var ln_exp 		"Log exports 1979-1988"
label var exp_ship		"Exports/shipments 1979-1988"
label var ave_iv_swiss	"Swiss IV (using col2 tariffs in Swiss formula)"
label var ave_swiss		"Swiss formula AVE (using AVE in Swiss formula)"

label var lag_ln_skill_pay_diff_ch	"Delta ln_skill_pay_diff 1972-1979"
label var lag_ln_imp_ch  	"Delta log imports 1972-1979"
label var lag_imp_pen_ch 	"Delta import penetration 1972-1979"
label var lag_ln_exp_ch 	"Delta log exports 1972-1979"
label var lag_exp_ship_ch	"Delta exports/shipments 1972-1979"

label var skill_pay		"Total non-production payroll in $1m"
	
label var ln_skill_pay_diff	"\$ \Delta \ln(\frac{Pay_i^{Non-Prod}}{Pay_i^{Prod}}) \$"
label var ln_skill_emp_diff	"\$ \Delta \ln(\frac{Emp_i^{Non-Prod}}{Emp_i^{Prod}}) \$"
label var ln_wsp			"\$ \Delta \ln(\frac{Wage_i^{Non-Prod}}{Wage_i^{Prod}}) \$"
	
label var beta 	"Placeholder var for beta coefficient"
label var se 	"Placeholder var for beta std error"
label var lb01 	"Placeholder var for lower bound 99% CI"
label var lb05 	"Placeholder var for lower bound 95% CI"
label var lb10 	"Placeholder var for lower bound 90% CI"
label var ub01 	"Placeholder var for upper bound 99% CI"
label var ub05 	"Placeholder var for upper bound 95% CI"
label var ub10 	"Placeholder var for upper bound 90% CI"

 
save "analysis/SIC87_regression_Dataset.dta", replace